rm(list = ls(all.names = TRUE))
gc(reset=T)
rm(list=ls())

#Packages
suppressMessages({ 
  pacman::p_load(bipartite, ggplot2, tidyr, dplyr, magrittr, ineq, reldist, gridGraphics, cowplot, ggrepel, stringr, gridExtra, gtable, ggwordcloud, kableExtra, pander, ggsci, maps, viridis, lsa, reshape2, data.table) })
suppressMessages({ 
  pacman::p_load(parallel, foreach, doParallel, maxnodf, e1071, devtools, EconGeo, econetwork, lpbrim, gghighlight) })

# Read in Data -------
countryGRIDRegion <- read.csv("/Path/GRID_Nation_Region_Names.csv")

# Note - this file is a censored version of COMBINED_OUTPUT_R_NODF_Citations.csv.gz
# You need to subset on 
# Citation Window = 5
# Citation_Logged = Log
# Sample_Stratified = Censored20
# Citation_Deflated = Deflated
NODF_df <- read.csv("/Path/Cen20Log_NODF_df.csv") 

# For the revision at Research Policy, we only include the updated values for 
# for the specified parameters in the text (e.g., citation window, logged, etc.)
Ubqt_df <- lapply(list.files(path = "/Path/", pattern = "OUTPUT_R_NODF_Ubiquity_Censored20_Deflated_Window5_Log.*\\.csv.gz", full.names = TRUE), read.csv)%>%
  do.call(rbind, .)

# Note - this file is a censored version of COMBINED_OUTPUT_R_NODF_Diversity.csv.gz
# You need to subset on 
# Citation Window = 5
# Citation_Logged = Log
# Sample_Stratified = Censored20
# Citation_Deflated = Deflated
Dvst_df <- read.csv("/Path/Cen20Log_Diversity_df.csv") 

# Note - this file is a censored version of COMBINED_OUTPUT_R_NODF_Matrix_Citations.csv.gz
# You need to subset on 
# Citation Window = 5
# Citation_Logged = Log
# Sample_Stratified = Censored20
# Citation_Deflated = Deflated
Matrix_df <- read.csv("/Path/Cen20Log_Matrix_df.csv")

# Note - the file is a censored version of COMBINED_OUTPUT_R_Ideal_NODF_Matrix_Citations.csv.gz
# You need to subset on 
# Citation Window = 5
# Citation_Logged = Log
# Sample_Stratified = Censored20
# Citation_Deflated = Deflated
Ideal_Matrix_df <- read.csv("/Path/Cen20Log_Ideal_Matrix_df.csv")

Logit_Filenames <- list.files(path="/Path", pattern="OUTPUT_R_Logit_Coefficients_BASELINE_*", full.names=TRUE)
Logit_df <- plyr::ldply(Logit_Filenames, read.csv) 

GRID_Ranking <- read.csv("/Path/INPUT_AWRU_2005_2018.csv",header=T) %>%
  subset(Year>=2005 & Year<=2012)

# Figure | Matrix NODF and GINI ----------

pltNODF <- ggplot(data=NODF_df, aes(x=Year, y=NODF_Normed, group=Discipline)) +
  geom_line(aes(group=Discipline), alpha=0.3)+
  geom_smooth(aes(group=1), method="lm", show.legend = F, color="#EE0000FF")+
  ggpubr::stat_cor(aes(group=1, label = paste(..r.label.., if_else(readr::parse_number(..p.label..) < 0.001, "p<0.001", ..p.label..), sep = "*`, `~")), method="pearson", label.x.npc="left", label.y.npc = "top", size=5) + 
  theme_bw() + 
  labs(y="Normalized NODF") +
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        axis.line = element_line(colour = "black"),
        strip.background = element_blank(), strip.placement = "outside",
        legend.title=element_blank()) +
  guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5))) +
  theme(plot.margin = unit(c(0,0.5,0,0.5), "cm"), 
        legend.margin = margin(unit(c(5,20,5,5), "cm"))) +
  geom_label_repel(data=subset(NODF_df, Year==1990),
                   aes(label=Discipline)) +
  scale_y_continuous(limits = c(0.15, 0.6))

pltGINI <- ggplot(data=NODF_df, aes(x=Year, y=Gini_Labels, group=Discipline)) +
  geom_line(aes(group=Discipline), alpha=0.3)+
  geom_smooth(aes(group=1), method="lm", show.legend = F, color="#184BBFFF")+
  ggpubr::stat_cor(aes(group=1, label = paste(..r.label.., if_else(readr::parse_number(..p.label..) < 0.001, "p<0.001", ..p.label..), sep = "*`, `~")), method="pearson", label.x.npc="left", label.y.npc = "top", size=5) + 
  theme_bw() + labs(y="Gini Index") +
  theme(text = element_text(size=16),
        panel.border = element_blank(), 
        axis.line = element_line(colour = "black"),
        strip.background = element_blank(), strip.placement = "outside",
        legend.title=element_blank()) +
  guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5))) +
  theme(plot.margin = unit(c(0,0.5,0,0.5), "cm"), 
        legend.margin = margin(unit(c(5,20,5,5), "cm"))) +
  geom_label_repel(data=subset(NODF_df, Year==1990),aes(label=Discipline)) +
  scale_y_continuous(limits = c(0.15, 0.51))

new_ngp1 <- plot_grid(ggpubr::ggarrange(pltNODF, pltGINI,
                                        labels = c("", "B"), nrow=2, ncol=1, widths = c(2, 2), align ="h"))

ngp2 <- ggplot(data=NODF_df, aes(x=NODF_Normed, y=Gini_Labels)) +
  geom_point(aes(colour=Year),alpha=0.9, size=3) +
  theme_bw()+ labs(x="Normalized NODF", y="Gini Index") + 
  ggpubr::stat_cor(aes(group=1, label = paste(..r.label.., if_else(readr::parse_number(..p.label..) < 0.001, "p<0.001", ..p.label..), sep = "*`, `~")), method = "pearson", label.x.npc = 'left', label.y = 0.52, size=5)+ #0.695
  geom_smooth(aes(group=1),method="lm", color="#0FA057FF")+
  theme(legend.position="bottom",
        text = element_text(size=16),
        panel.border = element_blank(), 
        axis.line = element_line(colour = "black"),
        strip.background = element_blank(), 
        strip.placement = "outside",
        legend.title=element_text(vjust=0.8)) +
  theme(legend.margin = margin(unit(c(5,20,5,5), "cm"))) + #top, right, bottom, left
  scale_color_viridis_c(option = "C", name="Year", breaks=c(1990,2001, 2012), limits=c(1990, 2012)) + theme(plot.margin = unit(c(0,0.5,0,0.5), "cm"))

### N for correlation 

dim(NODF_df)

## Combining NODF and Gini Plots ------

PLOT_NODF_Gini <- plot_grid(ggpubr::ggarrange(plot_grid(new_ngp1, NULL, ncol=1, rel_heights=c(6,0.5)),
                                     ngp2, labels = c("A", "C"), nrow=1, ncol=2, 
                                     widths = c(2.1, 2.55), align ="h"))
  
# Figure | Diversity Trends -------

##By GRID -----
#SE means over time by GRID

Dvst_dfavg <- Dvst_df %>% 
  group_by(GRIDs,Country, Year)%>%
  dplyr::summarize(SE_Mean = mean(Shannon_Entropy,na.rm=T),
                   Number_of_Fields = length(Shannon_Entropy))%>%
  group_by(GRIDs,Country)%>%
  dplyr::mutate(Number_of_Years = length(SE_Mean))%>%
  as.data.frame() %>%
  subset(Number_of_Fields==max(Number_of_Fields) & Number_of_Years==max(Number_of_Years))%>%
  subset(select=-c(Number_of_Fields,Number_of_Years))%>%
  mutate(Country = factor(Country, levels = unique(Country))) %>%
  mutate(grid_short = as.character(GRIDs)) %>%
  mutate_at("grid_short", str_trunc, width = 25, ellipsis = '...')

### N for correlation ------

Dvst_dfavg %>% dim()

#average line -------
TotalAvg <- Dvst_dfavg%>%group_by(Year)%>%  dplyr::summarize(SE_Mean = mean(SE_Mean,na.rm=T))%>%
  mutate(GRIDs="Global Average", Country="Global Average", grid_short="Global Average")

Dvst_dfavg <- rbind(TotalAvg, Dvst_dfavg)

#finding tops to highlight
#Dvst_dfavg%>%group_by(GRIDs)%>%subset(Year>=2005)%>%dplyr::summarize(SE_Mean = mean(SE_Mean,na.rm=T))%>%arrange(desc(SE_Mean))
highlightGRID <- c("Harvard University", "University of Toronto", "University of Cambridge","University of Nairobi","University of Hyderabad","Universidad de Los Andes")

#plotting
pgrid <- ggplot(data=Dvst_dfavg, aes(x=Year, y=SE_Mean, group=GRIDs))+
  geom_line(size=1) +
  theme_bw()+ labs(y="Mean Entropy") +
  gghighlight(GRIDs %in% highlightGRID, label_key=grid_short, calculate_per_facet=TRUE, use_direct_label=TRUE,
              unhighlighted_params = list(colour = "gray85")) +
  theme(text = element_text(size=14),
        panel.border = element_blank(), 
        axis.line = element_line(colour = "black"))+
  theme(plot.margin = unit(c(0,0.5,0,0.5), "cm")) +
  geom_line(data=Dvst_dfavg%>%subset(GRIDs=="Global Average"),  aes(x=Year, y=SE_Mean), color="#008B45FF", size=2) +
  ggpubr::stat_cor(data=Dvst_dfavg, aes(x=Year, y=SE_Mean, group=1, label = paste(..r.label.., if_else(readr::parse_number(..p.label..) < 0.001, "p<0.001", ..p.label..), sep = "*`, `~")), method = "pearson", label.x.npc = 'left', label.y.npc = "top", show.legend = F) +
  scale_y_continuous(limits=c(0.5, 2.5)) 

## By Country -------
#SE max scores over time by Country
Dvst_dfavgC <- Dvst_df %>% 
  group_by(Country, Year)%>%
  dplyr::summarize(SE_Max = max(Shannon_Entropy,na.rm=T),
                   Number_of_GRIDs_Fields = length(Shannon_Entropy))%>%
  group_by(Country)%>%
  dplyr::mutate(Number_of_Years = length(SE_Max))%>%
  as.data.frame() %>%
  subset(Number_of_GRIDs_Fields>1 & Number_of_Years==max(Number_of_Years))%>%
  subset(select=-c(Number_of_Years,Number_of_GRIDs_Fields)) %>%
  mutate(Country = factor(Country, levels=unique(Country)))

#average line
TotalAvgC <- Dvst_dfavgC%>%group_by(Year)%>%dplyr::summarize(SE_Max = mean(SE_Max,na.rm=T))%>%
  mutate(Country="Global Average")

Dvst_dfavgC <- rbind(TotalAvgC, Dvst_dfavgC)

#finding tops to highlight
#Dvst_dfavgC%>%group_by(Country)%>%dplyr::summarize(SE_Max = max(SE_Max,na.rm=T))%>%arrange(desc(SE_Max))
highlightCountry <- c("Canada", "United States", "United Kingdom","China","Ghana","Georgia","Bangladesh")

#plotting
pnat <- ggplot(data=Dvst_dfavgC, aes(x=Year, y=SE_Max, group=Country))+
  geom_line(size=1) +
  theme_bw()+ labs(y="Max Entropy") +
  geom_line(data=Dvst_dfavgC%>%subset(Country=="Global Average"),  aes(x=Year, y=SE_Max), color="#008B45FF", size=2) +
  gghighlight(Country %in% highlightCountry, label_key=Country, calculate_per_facet=TRUE, use_direct_label=TRUE,
              unhighlighted_params = list(colour = "gray85")) +
  theme(text = element_text(size=14),
        panel.border = element_blank(), 
        axis.line = element_line(colour = "black"))+
  theme(plot.margin = unit(c(0,0.5,0,0.5), "cm")) +
  ggpubr::stat_cor(data=Dvst_dfavgC, aes(x=Year, y=SE_Max, group=1, label = paste(..r.label.., if_else(readr::parse_number(..p.label..) < 0.001, "p<0.001", ..p.label..), sep = "*`, `~")), method = "pearson", label.x.npc = 'left', label.y.npc = "top", show.legend = F)    

#putting together A&B
pdvst <- plot_grid(pgrid, NULL, pnat,
                   nrow=2, labels=c("A", "" ,"B"), rel_widths=c(1, -0.02, 1))

#####Adding top 10 information######
top10GRID <- Dvst_dfavg %>% group_by(grid_short) %>% subset(Year>=2005) %>% dplyr::summarize(SE_Mean = mean(SE_Mean,na.rm=T)) %>% arrange(desc(SE_Mean)) %>% slice(0:10, 327:336) %>% 
  mutate(grid_short = factor(grid_short, levels=unique(grid_short))) 

top10NAT <- Dvst_dfavgC %>% group_by(Country) %>% subset(Year>=2005) %>% dplyr::summarize(SE_Max = mean(SE_Max,na.rm=T)) %>% arrange(desc(SE_Max)) %>% slice(0:10, 59:68) %>% 
  mutate(Country = factor(Country, levels=unique(Country))) 

top10GRID_df <- Dvst_dfavg%>%subset(grid_short %in% top10GRID$grid_short) %>% 
  mutate(grid_short = factor(grid_short, levels=top10GRID$grid_short))

top10NAT_df <- Dvst_dfavgC%>%subset(Country %in% top10NAT$Country) %>% 
  mutate(Country = factor(Country, levels=top10NAT$Country))

#adding in global average boxplot to C and D
#getting global avg in the middle 
orderGRID <- c(levels(top10GRID_df$grid_short)[1:10], "Global Average", levels(top10GRID_df$grid_short)[11:20])

orderNAT <- c(levels(top10NAT_df$Country)[1:10], "Global Average", levels(top10NAT_df$Country)[11:20])

#new data with global avg 
top10GRID_df2 <- top10GRID_df %>% rbind(TotalAvg) %>% mutate(grid_short = factor(grid_short, levels=orderGRID)) 

top10NAT_df2 <- top10NAT_df %>% rbind(TotalAvgC) %>% mutate(Country = factor(Country, levels=orderNAT)) 

#new colors with green in middle 
x_cols2 <- c(rep("#EE0000FF", each=10), "#008B45FF", rep("#3B4992FF", each=10))

#new plotting
pdvstGRID2 <- ggplot(top10GRID_df2, aes(x=grid_short, y=SE_Mean, color=grid_short)) +
  geom_boxplot() +
  scale_color_manual(values=ifelse(levels(top10GRID_df2$grid_short) == "Global Average", "#008B45FF", "black")) +
  labs(y="Mean Entropy", x=NULL, color=NULL) +
  theme_bw() + 
  theme(legend.position = "none", 
        plot.margin = unit(c(0,0.5,0,0.7), "cm"),
        axis.text.x=element_text(size=11, angle = 27, hjust=0.9, vjust =0.95, 
                                 lineheight=0.7, color=rev(x_cols2)))

pdvstNAT2 <- ggplot(top10NAT_df2, aes(x=Country, y=SE_Max, color=Country)) +
  geom_boxplot() +
  scale_color_manual(values=ifelse(levels(top10NAT_df2$Country) == "Global Average", "#008B45FF", "black")) +
  labs(y="Max Entropy", x=NULL, color=NULL) +
  theme_bw() +
  theme(legend.position = "none", 
        plot.margin = unit(c(0,0.5,0,0.7), "cm"),
        axis.text.x=element_text(size=11, angle = 27, hjust=0.9, vjust =0.95, 
                                 lineheight=0.7, color=rev(x_cols2)))  
## Combing Plot ---------------------------------------

PLOT_Entropy <- pdvst

# Figure | Entropy and Ranking --------

plot_entropy_by_ranking <- merge(Dvst_df%>%
                                   subset(Year>=2005 & Year<=2012),
                                 GRID_Ranking%>%
                                   dplyr::rename(GRIDs = University),
                                 by.x = c("Year","GRIDs"),
                                 by.y = c("Year","GRIDs"),
                                 all.x = T)%>%
  dplyr::mutate(World_Rank_Bucket = case_when(is.na(World_Rank_Bucket)==TRUE ~ "Unranked",
                                              World_Rank_Bucket %in% c("101-150","101-151","101-152","102-150","151-200","151-202","152-200","153-202") ~ "101-200",
                                              World_Rank_Bucket %in% c("201-300","201-302","203-300","203-304") ~ "201-300",
                                              World_Rank_Bucket %in% c("301-400","303-401","305-402") ~ "301-400",
                                              World_Rank_Bucket %in% c("401-500","402-501","402-503","403-510") ~ "401-500",
                                              TRUE~ World_Rank_Bucket)) %>%
  dplyr::mutate(Year = factor(Year))%>%
  dplyr::group_by(Discipline,World_Rank_Bucket) %>%
  dplyr::summarise(Entropy_Mean = mean(Shannon_Entropy,na.rm=T),
                   Entropy_Median = median(Shannon_Entropy,na.rm=T),
                   Entorpy_SE = sd(Shannon_Entropy,na.rm = T)/sqrt(length(Shannon_Entropy))) %>%
  ggplot(data=.,
         aes(x=World_Rank_Bucket,y=Entropy_Mean,group = Discipline))+
  geom_errorbar(aes(y = Entropy_Mean,
                    ymax = Entropy_Mean+Entorpy_SE,
                    ymin = Entropy_Mean-Entorpy_SE),
                show.legend = FALSE)+
  geom_point(aes(group=Discipline))+
  geom_line(aes(group=Discipline))+
  theme_bw()+
  # ggpubr::stat_cor(aes(x=World_Rank_Bucket,
  #                      y=Entropy_Mean,
  #                      group=Discipline,
  #                      label = paste(..r.label..,if_else(readr::parse_number(..p.label..) < 0.001, "p<0.001", ..p.label..), 
  #                                    sep = "*`, `~")), 
  #                  inherit.aes = FALSE,
  #                  method = "pearson", 
  #                  label.y.npc="top", 
  #                  label.x.npc = "left",
  #                  show.legend = F) +
  theme(panel.border = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), 
        strip.placement = "outside")+
  theme(legend.background = element_rect(colour = "black")) +
  theme(legend.title=element_blank())+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + 
  xlab("University Rank")+
  ylab("Mean Entropy") + 
  ggsci::scale_color_jama()+
  facet_wrap(~Discipline,scales = "free")


pdvst_10_2 <- plot_grid(pdvstGRID2, pdvstNAT2, align = "hv", ncol=1, labels=c("A", "B"))

PLOT_Entropy_and_Ranking <- ggpubr::ggarrange(pdvst_10_2,ggpubr::ggarrange(plot_entropy_by_ranking,labels=c("C")),nrow=2)

# Figure | Predicting -------
logit2prob <- function(logit){
  odds <- exp(logit)
  prob <- odds / (1 + odds)
  return(prob)
}

coef_predictions <- Logit_df %>% 
  mutate(Delta_T = paste("Year + ",as.character(Delta_T),sep="")) %>% 
  subset(Coefficients=="Predicted_Value_in_Present" & pvalue<=0.025) %>%
  mutate(OddsRatio = exp(Estimate)) 
  #mutate(Display_Estimate = logit2prob(Estimate))

beta_discipline_delta_df <- coef_predictions%>%
  subset(Year>=1990 & Year<=2011)%>%
  mutate(Decade = case_when(Year<2000 ~ "1990s",
                            TRUE ~ "2000s"))%>%
  dplyr::group_by(Discipline,Delta_T)%>%
  dplyr::summarise(Beta=mean(OddsRatio),
                   Beta_SE = sd(OddsRatio,na.rm=T)/sqrt(length(OddsRatio)),
                   N=length(OddsRatio)) %>%
  #dplyr::mutate(Beta_Label=paste(as.character(round(Beta,2)*100),"%\n","N=",as.character(N),sep=""))%>%
  as.data.frame() %>% 
  group_by(Discipline) %>% 
  mutate(SumBeta = sum(Beta)) %>% arrange(SumBeta) %>% ungroup() %>%
  mutate(Discipline=factor(Discipline, levels=unique(Discipline)))

### Number of Unique Models ------

coef_predictions %>%
  subset(Year>=1990 & Year<=2012) %>%
  select(Discipline,Year,Delta_T) %>% 
  unique() %>% 
  dim()

coef_predictions %>%
  subset(Year>=1990 & Year<=2012) %>%
  dplyr::summarise(Beta=mean(OddsRatio),
                   Beta_SE = sd(OddsRatio,na.rm=T)/sqrt(length(OddsRatio)))


coef_predictions %>%
  subset(Year>=1990 & Year<=2012 & Discipline == "Economics") %>%
  dplyr::summarise(Beta=mean(OddsRatio),
                   Beta_SE = sd(OddsRatio,na.rm=T)/sqrt(length(OddsRatio)))

## Plotting coefficients -------


coef_predictions%>%
  subset(Year>=1990 & Year<=2011)%>%
  ggplot(data=.,
          aes(x=Delta_T,y=OddsRatio))+
  geom_errorbar(aes(ymin = OddsRatio - `Std..Error`,
                    ymax = OddsRatio + `Std..Error`,
                    colour=Year),
                alpha=0.25)+
  theme_bw()+
  geom_point(aes(colour=Year))+
  xlab("")+
  ylab("Odds Ratio of a Future Presence")+
  theme(panel.border = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.background = element_rect(colour = "black")) +
  theme(legend.title=element_blank())+
  theme(axis.text.x=element_text(angle=45,hjust=1)) +
  scale_color_viridis_c(option = "C", name="Year", breaks=c(1990,2001, 2012), limits=c(1990, 2012)) + theme(plot.margin = unit(c(0,0.5,0,0.5), "cm"))+
  facet_wrap(~Discipline,nrow=2,scales="free_x")

plot_betas <- beta_discipline_delta_df %>%
  ggplot(data=.,aes(x=Delta_T,y=Beta))+
  theme_bw()+
  geom_point()+
  geom_line(aes(group=1))+
  xlab("")+
  ylab("Odds Ratio of a Future Presence")+
  geom_errorbar(aes(ymin = Beta - Beta_SE,
                    ymax = Beta + Beta_SE))+
  #scale_y_continuous(labels = scales::percent)+
  theme(panel.border = element_blank(), 
        axis.line = element_line(colour = "black")) +
  theme(legend.position="bottom")+
  theme(strip.background = element_blank(), strip.placement = "outside")+
  theme(legend.background = element_rect(colour = "black")) +
  theme(legend.title=element_blank())+
  theme(axis.text.x=element_text(angle=45,hjust=1)) +
  facet_wrap(~Discipline,nrow=2,scales="free_x")

coef_predictions2 <- coef_predictions%>%group_by(Delta_T, Year)%>%
  summarise(OddsRatio = mean(OddsRatio),
            NODF_Normed = mean(NODF_Normed))

newpredictB <- ggplot(data=coef_predictions%>%
                        subset(Year<=2012 & Year>=1990)%>%
                        mutate(Decade = case_when(Year<2000~"1990s",
                                                  TRUE~"2000s")), 
                      aes(x=NODF_Normed, y=OddsRatio))+
  geom_point(aes(colour=Year),alpha=0.9, size=3) +
  geom_smooth(aes(group=1),method="lm", color="#0FA057FF")+
  ggpubr::stat_cor(aes(group=1, label = paste(..r.label.., if_else(readr::parse_number(..p.label..) < 0.001, "p<0.001", ..p.label..), sep = "*`, `~")), method = "pearson", label.x.npc = 'left', label.y.npc = "top", show.legend = F)+
  theme_bw()+
  labs(x="Normalized NODF", y="Odds Ratio of a Future Presence") +
  scale_color_viridis_c(option = "C", name="Year", direction=-1, 
                        breaks=c(1990,2001, 2012), limits=c(1990, 2012)) + 
  #scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  theme(legend.position="bottom",
        text = element_text(size=14),
        panel.border = element_blank(), 
        axis.line = element_line(colour = "black"),
        axis.title=element_text(size=13),
        strip.background = element_blank(), 
        strip.placement = "outside") +
  theme(plot.margin = unit(c(0,0.5,0,0.5), "cm"), 
        legend.margin = margin(unit(c(-8,0,0,20), "cm")),
        legend.text = element_text(size=10),
        legend.title = element_text(size=10,vjust=0.8, hjust=0.5))+
  facet_grid(Decade~Delta_T)

## Combining Prediction ------

PLOT_Prediction <- plot_grid(plot_betas, 
                   newpredictB, 
                   nrow=2, 
                   align="h", 
                   labels=c("A", "B"))

# Output -------
pdf("/Path/FIGURE_NODF_over_Time.pdf",width=(7*1.25), height=7)
plot(pltNODF)
dev.off()


pdf("/Path/FIGURE_NODF_and_Gini.pdf",width=(10*1.5), height=10)
plot(PLOT_NODF_Gini)
dev.off()

pdf("/Path/FIGURE_Entropy.pdf",width=(8*1), height=8*1.25)
plot(PLOT_Entropy)
dev.off()

pdf("/Path/FIGURE_Entropy_and_Ranking.pdf",width=(9*1), height=9*1.2)
plot(PLOT_Entropy_and_Ranking)
dev.off()

pdf("/Path/FIGURE_Prediction.pdf",width=(6*1.5), height=6*1)
plot(plot_betas)
dev.off()

pdf("/Path/FIGURE_Prediction_NODF.pdf",width=(6*1.5), height=6*1)
plot(newpredictB)
dev.off()